home
***
CD-ROM
|
disk
|
FTP
|
other
***
search
/
Tech Arsenal 1
/
Tech Arsenal (Arsenal Computer).ISO
/
tek-03
/
huffman2.zip
/
HUFFMAN2.BAS
< prev
next >
Wrap
BASIC Source File
|
1992-06-01
|
14KB
|
486 lines
'Huffman encoder
'by Rich Geldreich May 29th, 1992
'This program is in the public domain.
DEFINT A-Z
DECLARE SUB InitTree ()
DECLARE SUB MakeSortTable ()
DECLARE SUB CombineTree ()
DECLARE SUB CleanUpTree ()
DECLARE SUB WriteTree ()
DECLARE SUB SortDistribution2 ()
DECLARE SUB SortDistribution ()
DECLARE SUB GetDistribution ()
DECLARE SUB RecurseTree (Node)
DECLARE SUB FillBuffer ()
CONST True = -1, False = 0
CONST Null = -2
CONST BufferLength = 10000
CLEAR , , 10000
DIM SHARED Father(512) AS LONG, LeftSon(512), RightSon(512)
DIM SHARED Index(512), RealIndex, Used(255) AS LONG
DIM SHARED Pointer(255), HighestEntry
DIM SHARED Code(255, 40), CodeLength(255)
DIM SHARED CurrentLength, CurrentCode(40)
DIM SHARED Buffer$, Address, EndAddress, Bits(8), CurrentByte, CurrentBit
DIM SHARED BufferSeg
LOCATE , , 1
Bits:
DATA 1,2,4,8,16,32,64,128,256
'read the bit masks
RESTORE Bits
FOR A = 0 TO 8: READ Bits(A): NEXT
'initilize the tree
InitTree
'initlize the input buffer
Buffer$ = STRING$(BufferLength, 0)
EndAddress = 1: Address = 0
PRINT "Getting Distribution:";
'open input file
OPEN COMMAND$ FOR BINARY AS #1
'check to see if it exists
IF LOF(1) = 0 THEN
CLOSE #1
KILL COMMAND$
PRINT
PRINT COMMAND$; " not found"
END
END IF
'read the input file and gather the distribution of each character
GetDistribution
'make a sorting table
MakeSortTable
'sort the table with the Shell Metzer sort
SortDistribution
'combine the tree until there is only one node at the "top"
CombineTree
'work down the tree finding codes which represent each character
TopOfTree = Pointer(0)
CurrentLength = 0
RecurseTree TopOfTree
'for debugging: prints the code for each character
'FOR A = 0 TO 255
' IF Used(A) > 256 THEN
' PRINT A;
' FOR B = 0 TO CodeLength(A)
' PRINT Code(A, B);
' NEXT
' PRINT
' END IF
'NEXT
'END
'"cleans" the tree up so it can be sent as small as possible
CleanUpTree
CurrentByte = 0: CurrentBit = 0
RealIndex = RealIndex - 1
'open output file
OPEN "output.huf" FOR BINARY AS #2
'kill file if it already exists
IF LOF(2) <> 0 THEN
CLOSE #2
KILL "output.huf"
OPEN "output.huf" FOR BINARY AS #2
END IF
'put the header
A& = LOF(1)
PUT #2, , A& 'number of bytes in original file
PUT #2, , RealIndex 'number of nodes in tree
Top = Index(TopOfTree)
PUT #2, , Top 'top of tree
WriteTree 'writes the tree to the output file
'compresses the input file
PRINT : PRINT "Encoding...": PRINT : PRINT
Ypos = CSRLIN - 2
SEEK #1, 1
EndAddress = 1: Address = 0
'initilize the output buffer
A$ = STRING$(5000, 0)
A& = SADD(A$)
A& = A& - 65536 * (A& < 0)
OBufferSeg = VARSEG(A$) + (A& \ 16)
OAddress = (A& MOD 16)
OEndAddress = OAddress + 5000
Ostart = OAddress
'start compressing
FOR A& = 1 TO LOF(1)
'get a byte from the input file
Address = Address + 1
'if Address=EndBuffer then it's time to fill the input buffer
IF Address = EndAddress THEN FillBuffer
B = PEEK(Address)
'send out all of the bits that represent the input character
FOR C = 0 TO CodeLength(B)
IF Code(B, C) THEN
CurrentByte = CurrentByte * 2 OR 1 'send "1"
ELSE
CurrentByte = CurrentByte * 2 'send "0"
END IF
CurrentBit = CurrentBit + 1
'if CurrentBit=8 then we have a complete byte
IF CurrentBit = 8 THEN
DEF SEG = OBufferSeg
POKE OAddress, CurrentByte
OAddress = OAddress + 1
'if Oaddress=Oendaddress then it's time to flush the
'output buffer
IF OAddress = OEndAddress THEN
PUT #2, , A$
B& = SADD(A$)
B& = B& - 65536 * (B& < 0)
OBufferSeg = VARSEG(A$) + (B& \ 16)
OAddress = (B& MOD 16)
OEndAddress = OAddress + 5000
Ostart = OAddress
END IF
CurrentByte = 0: CurrentBit = 0
DEF SEG = BufferSeg
END IF
NEXT
'see if it's time to update screen
PrintCount = PrintCount + 1
IF PrintCount = 1024 THEN
PrintCount = 0
LOCATE Ypos, 1
PRINT "Bytes In:"; A&; (A& * 100&) \ LOF(1); "% "
B& = LOF(2) + OAddress - Ostart
PRINT "Bytes Out:"; B&; " "
PRINT "Compression:"; 100 - (B& * 100&) \ A&; "% ";
END IF
NEXT
'put whatever is left of the byte buffer into the output buffer
DO UNTIL CurrentBit = 8
CurrentByte = CurrentByte * 2
CurrentBit = CurrentBit + 1
LOOP
DEF SEG = OBufferSeg
POKE OAddress, CurrentByte
A$ = LEFT$(A$, OAddress + 1 - Ostart)
PUT #2, , A$
'report compression
LOCATE Ypos, 1
PRINT "Bytes In:"; LOF(1); SPACE$(16)
PRINT "Bytes Out:"; LOF(2); SPACE$(16)
PRINT "Overall Compression:"; 100 - (LOF(2) * 100&) \ LOF(1); "%"; SPACE$(16);
CLOSE
END
'"Cleans" up the tree so it can be sent.
SUB CleanUpTree
RealIndex = 0
FOR A = 0 TO 512
B& = Father(A)
IF B& <> Null THEN
IF B& < 256 THEN
IF Used(B&) > 256 THEN
Index(A) = RealIndex
RealIndex = RealIndex + 1
END IF
ELSEIF B& > 256 THEN
Index(A) = RealIndex
RealIndex = RealIndex + 1
END IF
END IF
NEXT
FOR A = 0 TO 512
B& = Father(A)
IF B& <> Null THEN
IF B& < 256 THEN
IF Used(B&) > 256 THEN
IF LeftSon(A) <> Null THEN
LeftSon(A) = Index(LeftSon(A))
END IF
IF RightSon(A) <> Null THEN
RightSon(A) = Index(RightSon(A))
END IF
END IF
ELSEIF B& > 256 THEN
IF LeftSon(A) <> Null THEN
LeftSon(A) = Index(LeftSon(A))
END IF
IF RightSon(A) <> Null THEN
RightSon(A) = Index(RightSon(A))
END IF
END IF
END IF
NEXT
END SUB
'Combines the tree until there is only one node at the top.
SUB CombineTree
Parents = HighestEntry + 1
DO UNTIL Parents = 1
'sort the current distribution
SortDistribution2
'find the lowest 2 entries
Lowest = Pointer(HighestEntry)
NextLowest = Pointer(HighestEntry - 1)
'find new frequency
NewFrequency& = Father(Lowest) + Father(NextLowest) - 256
'combine the two nodes
IF RightSon(Lowest) = Null AND RightSon(NextLowest) = Null THEN
Father(NextLowest) = NewFrequency&
RightSon(NextLowest) = LeftSon(Lowest)
Father(Lowest) = Null
Parents = Parents - 1
HighestEntry = HighestEntry - 1
ELSEIF RightSon(Lowest) = Null AND RightSon(NextLowest) <> Null THEN
Father(Lowest) = NewFrequency&
RightSon(Lowest) = NextLowest
Pointer(HighestEntry - 1) = Pointer(HighestEntry)
Parents = Parents - 1
HighestEntry = HighestEntry - 1
ELSEIF RightSon(Lowest) <> Null AND RightSon(NextLowest) = Null THEN
Father(NextLowest) = NewFrequency&
RightSon(NextLowest) = Lowest
Parents = Parents - 1
HighestEntry = HighestEntry - 1
ELSEIF RightSon(Lowest) <> Null AND RightSon(NextLowest) <> Null THEN
'search for new node
FOR A = 512 TO 0 STEP -1
IF Father(A) = Null THEN EXIT FOR
NEXT
Father(A) = NewFrequency&
LeftSon(A) = Lowest
RightSon(A) = NextLowest
HighestEntry = HighestEntry - 1
Pointer(HighestEntry) = A
Parents = Parents - 1
END IF
'loop until there is only one node at the top
LOOP
END SUB
'Fills the input buffer.
SUB FillBuffer
GET #1, , Buffer$
A& = SADD(Buffer$)
A& = A& - 65536 * (A& < 0)
BufferSeg = VARSEG(Buffer$) + (A& \ 16)
Address = (A& MOD 16)
EndAddress = Address + BufferLength
DEF SEG = BufferSeg
END SUB
'Scans the input file for it's distribution.
SUB GetDistribution
FOR A& = 1 TO LOF(1)
Address = Address + 1
IF Address = EndAddress THEN
FillBuffer
PRINT ".";
END IF
B = PEEK(Address) * 2
Father(B) = Father(B) + 1
NEXT
B = 0
FOR A = 0 TO 510 STEP 2
Used(B) = Father(A): B = B + 1
NEXT
END SUB
'Initilizes the tree.
SUB InitTree
B = 0
FOR A = 0 TO 510 STEP 2
Father(A) = 256
LeftSon(A) = A + 1
RightSon(A) = Null
Father(A + 1) = B
LeftSon(A + 1) = Null
RightSon(A + 1) = Null
B = B + 1
NEXT
END SUB
'Makes a sorting table.
SUB MakeSortTable
HighestEntry = 0
FOR A = 0 TO 510 STEP 2
IF Father(A) > 256 THEN
Pointer(HighestEntry) = A
HighestEntry = HighestEntry + 1
END IF
NEXT
HighestEntry = HighestEntry - 1
END SUB
'Recursize procedure to go down the tree and build up codes
'that represent each character.
SUB RecurseTree (Node)
'are we at a character?
IF Father(Node) < 256 THEN
'yup! we CurrentCode() has this character's bit sequence
Char = Father(Node)
FOR A = 0 TO CurrentLength - 1
Code(Char, A) = CurrentCode(A)
NEXT
CodeLength(Char) = CurrentLength - 1
END IF
'go to the left if there's something there
IF LeftSon(Node) <> Null THEN
CurrentCode(CurrentLength) = 1 'add "1" to the current code
CurrentLength = CurrentLength + 1
RecurseTree LeftSon(Node) 'go down
CurrentLength = CurrentLength - 1 'take "1" from the current code
END IF
'go to the right if there's something there
IF RightSon(Node) <> Null THEN
CurrentCode(CurrentLength) = 0 'add "0" to the current code
CurrentLength = CurrentLength + 1
RecurseTree RightSon(Node) 'got down
CurrentLength = CurrentLength - 1 'take "0" from the current code
END IF
END SUB
'A REAL Shell sort follows. It is much faster than the well-known one.
'Sorts the nodes according to the sorting table.
SUB SortDistribution
Offset = HighestEntry \ 2
DO
FOR I = 0 TO HighestEntry - Offset
IF Father(Pointer(I)) < Father(Pointer(I + Offset)) THEN
SWAP Pointer(I), Pointer(I + Offset)
CompareLow = I - Offset
CompareHigh = I
DO WHILE CompareLow >= 0
IF Father(Pointer(CompareLow)) < Father(Pointer(CompareHigh)) THEN
SWAP Pointer(CompareLow), Pointer(CompareHigh)
CompareHigh = CompareLow
CompareLow = CompareLow - Offset
ELSE
EXIT DO
END IF
LOOP
END IF
NEXT
Offset = Offset \ 2
LOOP WHILE Offset > 0
END SUB
'A simple bubble sort... used while combining the tree.
SUB SortDistribution2
DO
SwapFlag = False
FOR A = HighestEntry - 1 TO 0 STEP -1
IF Father(Pointer(A + 1)) > Father(Pointer(A)) THEN
SWAP Pointer(A + 1), Pointer(A)
SwapFlag = True
END IF
NEXT
LOOP WHILE SwapFlag
END SUB
'Writes the tree to disk.
SUB WriteTree
FOR A = 0 TO 512
B& = Father(A)
IF B& <> Null THEN
IF B& < 256 THEN
IF Used(B&) > 256 THEN
GOSUB SendOne
FOR C = 0 TO 7
IF (B& AND Bits(C)) > 0 THEN
GOSUB SendOne
ELSE
GOSUB SendZero
END IF
NEXT
END IF
ELSEIF B& > 256 THEN
GOSUB SendZero
IF LeftSon(A) <> Null THEN
GOSUB SendOne
Son = LeftSon(A)
FOR C = 0 TO 8
IF (Son AND Bits(C)) > 0 THEN
GOSUB SendOne
ELSE
GOSUB SendZero
END IF
NEXT
ELSE
GOSUB SendZero
END IF
IF RightSon(A) <> Null THEN
GOSUB SendOne
Son = RightSon(A)
FOR C = 0 TO 8
IF (Son AND Bits(C)) > 0 THEN
GOSUB SendOne
ELSE
GOSUB SendZero
END IF
NEXT
ELSE
GOSUB SendZero
END IF
END IF
END IF
NEXT
EXIT SUB
SendZero:
CurrentByte = CurrentByte * 2
CurrentBit = CurrentBit + 1
IF CurrentBit = 8 THEN
A$ = CHR$(CurrentByte)
PUT #2, , A$
CurrentByte = 0: CurrentBit = 0
END IF
RETURN
SendOne:
CurrentByte = CurrentByte * 2 OR 1
CurrentBit = CurrentBit + 1
IF CurrentBit = 8 THEN
A$ = CHR$(CurrentByte)
PUT #2, , A$
CurrentByte = 0: CurrentBit = 0
END IF
RETURN
END SUB